library(bnlearn)
library(gRain)
source('functions.R')
data <- read.table("heart.csv", sep = ",", header = T)
names <- names(data)
names[1] <- c("age")
colnames(data) <- names
data$sex[data$sex == 0] = "female"          
data$sex[data$sex == 1] = "male"                      

data$age[data$age < 60] = "young"  
data$age[data$age >= 60 &  data$age != "young"] = "old"  

data$chol[data$chol <= 220] = "normal"
data$chol[data$chol > 220 & data$chol <= 300 & data$chol != "normal"] = "high"
data$chol[data$chol > 300 & data$chol != "normal" & data$chol != "high"] = "very.high"

data$trestbps[data$trestbps <= 130] = "less.than.130"          
data$trestbps[data$trestbps > 130 & data$trestbps != "less.than.130"] = "more.than.130"           

data$thalach[data$thalach <= 120] = "less.than.120.bpm"           
data$thalach[data$thalach > 120 & data$thalach != "less.than.120.bpm"] = "more.than.120.bpm"            

data$oldpeak[data$oldpeak <= 0.1] = "less.than.0.1.mV"           
data$oldpeak[data$oldpeak > 0.1 & data$oldpeak <= 0.75 & data$oldpeak != "less.than.0.1.mV" ] = "0.1..0.75.mV"           
data$oldpeak[data$oldpeak > 0.75 & data$oldpeak != "less.than.0.1.mV" & data$oldpeak != "0.1..0.75.mV" ] = "more.than.0.75.mV"            
 
data$cp[data$cp == 0 ] = "no.chest.pain"                      
data$cp[data$cp == 3 ] = "no.chest.pain"                    
data$cp[data$cp == 1 ] = "chest.pain"                     
data$cp[data$cp == 2 ] = "chest.pain"                     

data$fbs[data$fbs == 0] = "normal"                    
data$fbs[data$fbs == 1] = "high"                    

data$exang[data$exang == 0] = "no.angina"                      
data$exang[data$exang == 1] = "angina"                      

data$slope[data$slope == 0] = "upsloping"                
data$slope[data$slope == 1] = "flat"                                
data$slope[data$slope == 2] = "downsloping"                                   

data$target[data$target == 1] = 3
data$target[data$target == 0] = "disease"               
data$target[data$target == 3] = "not.disease"               

data$ca <- NULL  
data$thal <- NULL 
data$restecg <- NULL
summary(data)
##      age                sex                 cp              trestbps        
##  Length:303         Length:303         Length:303         Length:303        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      chol               fbs              thalach             exang          
##  Length:303         Length:303         Length:303         Length:303        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    oldpeak             slope              target         
##  Length:303         Length:303         Length:303        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
data[] <- lapply(data, as.factor)
set.seed(0)
bl.tabu.1 <- tabu(data, score = "bic", tabu = 20, max.tabu = 20, max.iter = 1000000)

set.seed(0)
bl.tabu.2 <- tabu(data, score = "k2", tabu = 20, max.tabu = 20, max.iter = 1000000)

set.seed(0)
bl.tabu.3 <- tabu(data, score = "aic", tabu = 20, max.tabu = 20, max.iter = 1000000)

set.seed(0)
bl.tabu.4 <- tabu(data, score = "loglik", tabu = 20, max.tabu = 20, max.iter = 1000000)

set.seed(0)
bl.hc.1 <- hc(data, score = "bic", max.iter = 1000000, restart = 20)

set.seed(0)
bl.hc.2 <- hc(data, score = "k2", max.iter = 1000000, restart = 20)

set.seed(0)
bl.hc.3 <- hc(data, score = "aic", max.iter = 1000000, restart = 20)

set.seed(0)
bl.hc.4 <- hc(data, score = "loglik", max.iter = 1000000, restart = 20)

Análisis de las redes obtenidas

score(bl.tabu.1, data, type = "bic")   
## [1] -2191.457
score(bl.tabu.2, data, type = "bic") 
## [1] -2208.063
score(bl.tabu.3, data, type = "bic") 
## [1] -2257.161
score(bl.tabu.4, data, type = "bic")
## [1] -21351.56
score(bl.hc.1, data, type = "bic")    
## [1] -2191.457
score(bl.hc.2, data, type = "bic") 
## [1] -2200.527
score(bl.hc.3, data, type = "bic") 
## [1] -2257.161
score(bl.hc.4, data, type = "bic") 
## [1] -21351.56

Búsqueda Tabu

plot(bl.tabu.1)

plot(bl.tabu.2)

plot(bl.tabu.3)

plot(bl.tabu.4)

Búsqueda hill-climbing

plot(bl.hc.1)

plot(bl.hc.2)

plot(bl.hc.3)

plot(bl.hc.4)

Analisis de la evolución del score de los algoritmos en combinación con las diferentes métricas.

set.seed(0)
score.tabu.1 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- tabu(data, score = "bic", tabu = 20, max.tabu = 20, max.iter = n)
  score.tabu.1 <- c(score.tabu.1, score(net, data, type = "bic") )
}

set.seed(0)
score.tabu.2 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- tabu(data, score = "aic", tabu = 20, max.tabu = 20, max.iter = n)
  score.tabu.2 <- c(score.tabu.2, score(net, data, type = "aic") )
}
set.seed(0)
score.tabu.3 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- tabu(data, score = "loglik", tabu = 20, max.tabu = 20, max.iter = n)
  score.tabu.3 <- c(score.tabu.3, score(net, data, type = "loglik") )
}
set.seed(0)
score.tabu.4 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- tabu(data, score = "k2", tabu = 20, max.tabu = 20, max.iter = n)
  score.tabu.4 <- c(score.tabu.4, score(net, data, type = "k2") )
}

set.seed(0)
score.hc.1 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- hc(data, score = "bic", max.iter = n, restart = 20)
  score.hc.1 <- c(score.hc.1, score(net, data, type = "bic") )
}
set.seed(0)
score.hc.2 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- hc(data, score = "aic", max.iter = n, restart = 20)
  score.hc.2 <- c(score.hc.2, score(net, data, type = "aic") )
}

set.seed(0)
score.hc.3 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- hc(data, score = "loglik", max.iter = n, restart = 20)
  score.hc.3 <- c(score.hc.3, score(net, data, type = "loglik") )
}
set.seed(0)
score.hc.4 = c()
for(n in seq(from = 1, to = 100, by = 1)){
  net <- hc(data, score = "k2", max.iter = n, restart = 20)
  score.hc.4 <- c(score.hc.4, score(net, data, type = "k2") )
}
plot(score.tabu.1, main = "Tabu search + BIC", ylab = "BIC score", xlab = "Iteration", type = "o", col = "black")

plot(score.tabu.2, main = "Tabu search + AIC", ylab = "AIC score", xlab = "Iteration", type = "o", col = "black")

plot(score.tabu.3, main = "Tabu search + LOGLIK", ylab = "LOGLIK score", xlab = "Iteration", type = "o", col = "black")

plot(score.tabu.4, main = "Tabu search + K2", ylab = "K2 score", xlab = "Iteration", type = "o", col = "black")

plot(score.hc.1, main = "Hill-Climbing + BIC", ylab = "BIC score", xlab = "Iteration",type = "o", col = "black")

plot(score.hc.2, main = "Hill-Climbing + AIC", ylab = "AIC score", xlab = "Iteration",type = "o", col = "black")

plot(score.hc.3, main = "LOGLIK score in Hill-Climbing search", ylab = "LOGLK score", xlab = "Iteration",type = "o", col = "black")

plot(score.hc.4, main = "K2 score in Hill-Climbing search", ylab = "K2 score", xlab = "Iteration",type = "o", col = "black")

Ajuste de las mejores redes

bl.tabu.1 <- reverse.arc(bl.tabu.1, "cp", "target")
bl.tabu.1 <- reverse.arc(bl.tabu.1, "age", "oldpeak")
bl.tabu.1 <- drop.arc(bl.tabu.1, "age", "sex")
bl.tabu.1 <- reverse.arc(bl.tabu.1, "target", "sex")

bl.tabu.3 <- drop.arc(bl.tabu.3, "age", "sex")
bl.tabu.3 <- reverse.arc(bl.tabu.3, "age", "oldpeak")
bl.tabu.3 <- reverse.arc(bl.tabu.3, "fbs", "trestbps")

bl.hc.3 <- drop.arc(bl.hc.3, "age", "sex")
bl.hc.3 <- reverse.arc(bl.hc.3, "fbs", "trestbps")
bl.hc.3 <- reverse.arc(bl.hc.3, "age", "oldpeak")
bl.hc.3 <- reverse.arc(bl.hc.3, "cp", "oldpeak")
bl.hc.3 <- reverse.arc(bl.hc.3, "sex", "slope")
bl.hc.3 <- reverse.arc(bl.hc.3, "sex", "target")
plot(bl.tabu.1, highlight = list(nodes = "target"))

plot(bl.tabu.3, highlight = list(nodes = "target"))

plot(bl.hc.3, highlight = list(nodes = "target"))

Comparación de la estructura de las redes usando la distancia de Hamming.

bnlearn::hamming(bl.tabu.3, bl.tabu.1)
## [1] 11
bnlearn::hamming(bl.hc.3, bl.tabu.1)
## [1] 11
bnlearn::hamming(bl.tabu.3, bl.hc.3)
## [1] 0
graphviz.compare(bl.tabu.3, bl.tabu.1)

graphviz.compare(bl.hc.3, bl.tabu.1)

graphviz.compare(bl.tabu.3, bl.hc.3)

Redes creadas mediante bnstruct y el estudio de la independencia.

Después de la obtención de las primeras redes se ajustaron en base relaciones lógicas.

Red desarrollada en base a un algoritmo completo (Silander-Myllymaki) que usa como score el Bayesian Information Criterium.

description.1 <- paste('[fbs|sex]',
                       '[chol|sex]',
                       '[sex]',
                       '[trestbps|fbs:sex:age]',
                       '[age]',
                       '[thalach|age:target:exang]',
                       '[exang|age:target:oldpeak]',
                       '[oldpeak|target:trestbps]',
                       '[cp|target]',
                       '[slope|trestbps:target]',
                       '[target|age]',
                        sep = '')

bl.expert.1 <- model2network(description.1)
plot(bl.expert.1, highlight = list(nodes = "target"))

Red desarrollada en base al estudio de la independencia.

description.2 <- paste('[age][sex][fbs][thalach]',
                       '[trestbps|fbs]',
                       '[slope|age:trestbps:target]',
                       '[oldpeak|target:slope]',
                       '[chol|age]',
                       '[exang|target:oldpeak:cp]',
                       '[target|chol:sex:fbs]',
                       '[cp|target:age:fbs]', sep = '')
bl.expert.2 <- model2network(description.2)
plot(bl.expert.2, highlight = list(nodes = "target"))

Los scores obtenidos mediante el criterio de información Bayesiana aumentarón tras la re-estructuración de las redes.

score(bl.tabu.1, data, type = "bic")
## [1] -2200.638
score(bl.tabu.3, data, type = "bic")
## [1] -2290.816
score(bl.hc.3, data, type = "bic")
## [1] -2294.673
score(bl.expert.1,data, type = "bic")
## [1] -2289.853
score(bl.expert.2,data, type = "bic")
## [1] -2298.644

Número de conexioens entre cada vez.

length(arcs(bl.tabu.1))
## [1] 20
length(arcs(bl.tabu.3))
## [1] 38
length(arcs(bl.hc.3))
## [1] 38
length(arcs(bl.expert.1))
## [1] 34
length(arcs(bl.expert.2))
## [1] 32

Ajuste de parámetros de las redes

Ajustamos los parámetros de la red mediante el método de Bayes.

bl.tabu.1.fit <- bn.fit(bl.tabu.1, data, method = "bayes")
bl.tabu.3.fit <- bn.fit(bl.tabu.3, data, method = "bayes")
bl.hc.3.fit <- bn.fit(bl.hc.3, data, method = "bayes")
bl.expert.1.fit <- bn.fit(bl.expert.1, data, method = "bayes")
bl.expert.2.fit <- bn.fit(bl.expert.2, data, method = "bayes")

Inferencia aproximada mediante logistic sample

A continuación se va a comparar el rendimiento de las diferentes redes frente a diferentes situaciones.

  • Daño cardiaco en función de la dolor de pecho y angina: Son resultados interesantes, no tiene porque haber daño cardiaco a partir de estas observaciones.
library(ggplot2)
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain"), method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain"), method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain"), method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain"), method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain"), method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage with angina and chest pain") + 
      geom_text(aes(label = probability), size = 5, vjust = -1) +
  ylim(0, 1)+ theme_minimal()

  • Si añadimos historial clinico sobre colesterol e hipertensión: Los resultados de las dos segundas redes son bastante interesantes ya que implica que seguramente haya daño cardiaco cuando tenemos tensión elevada y colesterol muy alto.
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "very.high" & trestbps == "more.than.130"), method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "very.high" & trestbps == "more.than.130"), method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "very.high" & trestbps == "more.than.130"), method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "very.high" & trestbps == "more.than.130"), method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "very.high" & trestbps == "more.than.130"), method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage with angina, chest pain, hypertension and high cholesterol (> 300 mg/dL) ") + 
      geom_text(aes(label = probability), size = 5,  vjust = -1) +
  ylim(0, 1)+ theme_minimal()

  • Si disminuimos los niveles de colesterol se puede ver que la probabildiad disminuye de forma considerable lo que resulta lógico.
set.seed(0)
prob.1 <-cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "high" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.2 <-cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "high" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.3 <-cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "high" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.4 <-cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "high" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.5 <-cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "high" & trestbps == "more.than.130"),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage with angina, chest pain, hypertension and normal cholesterol (220 - 300 mg/dL)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

set.seed(0)
prob.1 <-cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "normal" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.2 <-cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "normal" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.3 <-cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "normal" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.4 <-cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (exang == "angina"  & cp == "chest.pain" & chol == "normal" & trestbps == "more.than.130"),method = "ls", n = 1000000)

prob.5 <-cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (exang == "angina"  & cp == "chest.pain" & chol == "normal" & trestbps == "more.than.130"),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage with angina, chest pain, hypertension and normal cholesterol (< 220 mg/dL)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

En general se puede apreciar que las redes TAIC y HAIC esta fuertemente influenciada por los niveles de colesterol. La red 2 parece adaptarse mejor a la realidad.

  • Daño cardiaco en función de las alteraciones en el electrocardiograma:
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak < 0.1 mv & form upsloping)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "0.1..0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "0.1..0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "0.1..0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "0.1..0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "0.1..0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak 0.1-0.75 mv & form upsloping)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

Un valor alto del segmento ST después del ejercicio se asocia a un incremento bastante importante en la probabildiad de tener daño cardiaco en todas las redes.

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "upsloping" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form upsloping)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

Si repetimos lo mismo con la pendiente del segmento ST se puede ver que una pendiente plana aumenta la probabilidad de riesgo cardiaco.

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "flat" ), method = "ls",n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "flat" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak < 0.1 mV & form flat)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "downsloping" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "downsloping" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "downsloping" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "less.than.0.1.mV"  & slope == "downsloping" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "less.than.0.1.mV"  & slope == "downsloping" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak 0.1-0.75 mV & form downsloping)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

Si combinamos un incremento en el voltaje del segmento ST con una pendiente plana tenemos la mayor probabildiad de tener daño cardiaco

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat)") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

  • Combinando las alteraciones en el electrocardigrama con los sintomas clínicos de dolor en el pecho y angina, sorprendetemente la probabildiad de tener daño cardiaco disminuye lo que parece bastante contradictorio.
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & exang == "angina"  & cp == "chest.pain" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & exang == "angina"  & cp == "chest.pain" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & exang == "angina"  & cp == "chest.pain" ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & exang == "angina"  & cp == "chest.pain" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & exang == "angina"  & cp == "chest.pain" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), chest pain and angina") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

  • Combinando las alteraciones en el electrocardigrama con colesterol y que se trate de un hombre tenemos que la probabildiad se incrementa
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male"  ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male"  ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male"  ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), cholesterol (>220 mg/dL) and sex male") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

Para una mujer el riesgo disminuye.

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" ),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female"  ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female"  ),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" ),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female"  ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), cholesterol (>220 mg/dL) and sex female") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

  • Incorporando a lo anterior la variable edad:
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "young"),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "young" ),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male"  & age == "young"),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "young"),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "young" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), cholesterol (>220 mg/dL), sex male and young") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

En hombres mayores se puede ver que en la primera red aumenta de forma considerable mientras que en el resto disminuye en difernete grado, en este caso la primera red parece ser la más realista.

set.seed(0)
prob.1<- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "old"),method = "ls", n = 1000000)

prob.2<- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "old" ),method = "ls", n = 1000000)

prob.3<- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male"  & age == "old"),method = "ls", n = 1000000)

prob.4<- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "old"),method = "ls", n = 1000000)

prob.5<- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "male" & age == "old" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Heart damage based on ECG (oldpeak > 0.75 mV & slope flat), chol (>220 mg/dL) and old male") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

Para una mujer el riesgo parece ser independiente de la edad en algunas redes y aunque en general afecta a todas se puede ver que no tiene una fuerte influencia. La primera red de nuevo parece dar resultados más realistas.

set.seed(0)
prob.1<- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "young"),method = "ls", n = 1000000)

prob.2<- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "young" ),method = "ls", n = 1000000)

prob.3<- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female"  & age == "young"),method = "ls", n = 1000000)

prob.4<- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "young"),method = "ls", n = 1000000)

prob.5<- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "young" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), cholesterol (>220 mg/dL), sex female and young") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

set.seed(0)
prob.1<- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "old"),method = "ls", n = 1000000)

prob.2<- cpquery(bl.tabu.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "old" ),method = "ls", n = 1000000)

prob.3<- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female"  & age == "old"),method = "ls", n = 1000000)

prob.4<- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "old"),method = "ls", n = 1000000)

prob.5<- cpquery(bl.expert.2.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & chol != "normal" & sex == "female" & age == "old" ),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Heart damage based on ECG (oldpeak > 0.75 mV & slope flat), chol (>220 mg/dL) and old female") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

  • Es interesante tmabién lo que pasa al incorporar la taquicardia, el nerviosismo del paciente causa de la ansidad puede alterar las pruebas y quizás disminuir el riesgo, no obstante habría que tomarlo con cautela, puede tratarse de una variable irrelevante ya que las personas se pueden poner nerviosas al llegar al hospital o creer que estan (independientemente de que lo estén) sufriendo un ataque cardiaco.
set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "less.than.120.bpm"),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"),evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & cp == "chest.pain" & exang == "angina" & thalach == "less.than.120.bpm"),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "less.than.120.bpm"),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "less.than.120.bpm"),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"),evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & cp == "chest.pain" & exang == "angina" & thalach == "less.than.120.bpm"),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), chest pain, angina and less than 120 bpm") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

set.seed(0)
prob.1 <- cpquery(bl.tabu.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "more.than.120.bpm"),method = "ls", n = 1000000)

prob.2 <- cpquery(bl.tabu.3.fit, event = (target == "disease"),evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & cp == "chest.pain" & exang == "angina" & thalach == "more.than.120.bpm"),method = "ls", n = 1000000)

prob.3 <- cpquery(bl.hc.3.fit, event = (target == "disease"), evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "more.than.120.bpm"),method = "ls", n = 1000000)

prob.4 <- cpquery(bl.expert.1.fit, event = (target == "disease"),  evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat"  & cp == "chest.pain" & exang == "angina" & thalach == "more.than.120.bpm"),method = "ls", n = 1000000)

prob.5 <- cpquery(bl.expert.2.fit, event = (target == "disease"),evidence = (oldpeak == "more.than.0.75.mV"  & slope == "flat" & cp == "chest.pain" & exang == "angina" & thalach == "more.than.120.bpm"),method = "ls", n = 1000000)

network <- c("TBIC", "TAIC", "HAIC", "IND", "SM")
probability <- c(prob.1, prob.2, prob.3, prob.4, prob.5)
probability <- sapply(probability, round,3) 

df <- data.frame(network, probability)

g <- ggplot(df, aes(network, probability, fill = network))
g + geom_bar(stat="identity", width = 0.5) + 
      labs(title="Bayessian networks", 
           subtitle="Probability of heart damage based on electrocardiogram alterations (oldpeak > 0.75 mV & form flat), chest pain, angina and thachycardia") + 
    geom_text(aes(label = probability), size = 5,  vjust = -1) +
    ylim(0, 1)+ theme_minimal()

CONCLUSIÓN:Sin ninguna duda la red IND ha sido la que ha dado resultados más lógicos en base al conocimiento actual del tema, por lo tanto se usará esta red para la inferencia exacta.

INFERENCIA EXACTA

En general las probabildiades obtenidas mediante este método han sido mucho más bajas.

library(gRain)
gr.net <- as.grain(bl.expert.1.fit)   # IND net
gr.net.c<- compile(gr.net)
gr.net.c
## Independence network: Compiled: TRUE Propagated: FALSE 
##   Nodes: chr [1:11] "age" "chol" "cp" "exang" "fbs" "oldpeak" "sex" "slope" ...

La probabilidad de observar una angina y daño cardiaco según el modelo es:

nodes  <- c("target", "exang")
evidence <- c("disease", "angina")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
pEvidence(gr.net.c)
## [1] 0.2528061
gr.net.c <- retractEvidence(gr.net.c, nodes)

De observar dolor de pecho y daño cardiaco:

nodes  <- c("target", "cp")
evidence <- c("disease", "chest.pain")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
pEvidence(gr.net.c)
## [1] 0.08963816
gr.net.c <- retractEvidence(gr.net.c, nodes)

Colesterol y daño cardiaco

nodes  <- c("target", "chol")
evidence <- c("disease", "normal")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
chol.normal <- pEvidence(gr.net.c)
gr.net.c <- retractEvidence(gr.net.c, nodes)
chol.normal
## [1] 0.1488667
nodes  <- c("target", "chol")
evidence <- c("disease", "high")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
chol.moderate <- pEvidence(gr.net.c)
gr.net.c <- retractEvidence(gr.net.c, nodes)
chol.moderate
## [1] 0.2417835
nodes  <- c("target", "chol")
evidence <- c("disease", "very.high")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
chol.high <- pEvidence(gr.net.c)
gr.net.c <- retractEvidence(gr.net.c, nodes)
chol.high
## [1] 0.06494186

Riesgo relativo de daño cardiaco cuando los niveles de colesterol están entre 220 y 300 mg/dL

chol.moderate/chol.normal
## [1] 1.624161

Riesgo relativo de daño cardiaco cuando los niveles de colesterol son superiores a 300 mg/dL

chol.high/chol.normal
## [1] 0.4362416

Influencia del sexo.

nodes  <- c("target", "sex")
evidence <- c("disease", "male")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
male.prob <- pEvidence(gr.net.c)
gr.net.c <- retractEvidence(gr.net.c, nodes)
male.prob
## [1] 0.3109716
nodes  <- c("target", "sex")
evidence <- c("disease", "female")
gr.net.c <- setEvidence(object = gr.net.c, nodes = nodes,
                        states = evidence)
female.prob <- pEvidence(gr.net.c)
gr.net.c <- retractEvidence(gr.net.c, nodes)
female.prob
## [1] 0.1446205

Riesgo relativo hombres / mujeres

male.prob/female.prob
## [1] 2.150259

Para saber cual es el estado más probable de daño cardiaco cuando tenemos dolor de pecho y angina

disease <- querygrain(gr.net.c, type = "joint", nodes = c("exang", "cp", "target"))
disease[,,cp="chest.pain"]
##              exang
## target            angina  no.angina
##   disease     0.04973983 0.03989832
##   not.disease 0.05097219 0.31169228

Con alteraciones en el electrocardiograma es más probable que haya daño en el corazón cuando tenemos un electrocardiograma con un pendiente plana en la onda ST y un depresión de dicho segmento de 0.75 mV.

disease <- querygrain(gr.net.c, type = "joint", nodes = c("oldpeak", "slope", "target"))
disease
## , , slope = downsloping
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease      0.008966331        0.0245184        0.08379446
##   not.disease  0.070704857        0.1661251        0.11641605
## 
## , , slope = flat
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease       0.02209586       0.06107776        0.21574580
##   not.disease   0.03362642       0.07609459        0.05110187
## 
## , , slope = upsloping
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease      0.002836835      0.007908976        0.02864770
##   not.disease  0.005929641      0.014221850        0.01018751
## 
## attr(,"class")
## [1] "parray" "array"

Con colesterol alto y alteraciones en la variable oldpeak es interesante lo que sale.

disease <- querygrain(gr.net.c, type = "joint", nodes = c("chol","oldpeak", "target"))
disease
## , , chol = high
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease       0.01801045       0.04966102        0.17411207
##   not.disease   0.05862567       0.13612939        0.09416317
## 
## , , chol = normal
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease       0.01108629       0.03057121        0.10720922
##   not.disease   0.03608078       0.08381031        0.05799658
## 
## , , chol = very.high
## 
##              oldpeak
## target        0.1..0.75.mV less.than.0.1.mV more.than.0.75.mV
##   disease      0.004802284       0.01327291        0.04686666
##   not.disease  0.015554481       0.03650183        0.02554569
## 
## attr(,"class")
## [1] "parray" "array"

RED FINAL

library(Rgraphviz)
all.arcs <- as.data.frame(arcs(bl.expert.1))
target.arcs <- as.matrix(all.arcs[all.arcs$from == "target" & all.arcs$to != "thalach",])

hight <- list(nodes = c(node = "target"), fill = "red",lty = 5, textCol = "white", arcs = target.arcs, col = "red")
pp <- graphviz.plot(bl.expert.1, highlight = hight, render = FALSE)
nodeRenderInfo(pp) <- list(fill = c(exang = "orange", cp = "orange", oldpeak = "orange", slope = "orange")) 

renderGraph(pp)